home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- ''' A SAX2 driver for libxml2, on top of it\'s XmlReader API
-
- USAGE
- # put this file (drv_libxml2.py) in PYTHONPATH
- import xml.sax
- reader = xml.sax.make_parser(["drv_libxml2"])
- # ...and the rest is standard python sax.
-
- CAVEATS
- - Lexical handlers are supported, except for start/endEntity
- (waiting for XmlReader.ResolveEntity) and start/endDTD
- - Error callbacks are not exactly synchronous, they tend
- to be invoked before the corresponding content callback,
- because the underlying reader interface parses
- data by chunks of 512 bytes
-
- TODO
- - search for TODO
- - some ErrorHandler events (warning)
- - some ContentHandler events (setDocumentLocator, skippedEntity)
- - EntityResolver (using libxml2.?)
- - DTDHandler (if/when libxml2 exposes such node types)
- - DeclHandler (if/when libxml2 exposes such node types)
- - property_xml_string?
- - feature_string_interning?
- - Incremental parser
- - additional performance tuning:
- - one might cache callbacks to avoid some name lookups
- - one might implement a smarter way to pass attributes to startElement
- (some kind of lazy evaluation?)
- - there might be room for improvement in start/endPrefixMapping
- - other?
-
- '''
- __author__ = u'Stéphane Bidoul <sbi@skynet.be>'
- __version__ = '0.3'
- import codecs
- from types import StringType, UnicodeType
- StringTypes = (StringType, UnicodeType)
- from xml.sax._exceptions import *
- from xml.sax import xmlreader, saxutils
- from xml.sax.handler import feature_namespaces, feature_namespace_prefixes, feature_string_interning, feature_validation, feature_external_ges, feature_external_pes, property_lexical_handler, property_declaration_handler, property_dom_node, property_xml_string
- _decoder = codecs.lookup('utf8')[1]
-
- def _d(s):
- if s is None:
- return s
- return _decoder(s)[0]
-
-
- try:
- import libxml2
- except ImportError:
- e = None
- raise SAXReaderNotAvailable('libxml2 not available: import error was: %s' % e)
-
-
- class Locator(xmlreader.Locator):
- '''SAX Locator adapter for libxml2.xmlTextReaderLocator'''
-
- def __init__(self, locator):
- self._Locator__locator = locator
-
-
- def getColumnNumber(self):
- '''Return the column number where the current event ends.'''
- return -1
-
-
- def getLineNumber(self):
- '''Return the line number where the current event ends.'''
- return self._Locator__locator.LineNumber()
-
-
- def getPublicId(self):
- '''Return the public identifier for the current event.'''
- pass
-
-
- def getSystemId(self):
- '''Return the system identifier for the current event.'''
- return self._Locator__locator.BaseURI()
-
-
-
- class LibXml2Reader(xmlreader.XMLReader):
-
- def __init__(self):
- xmlreader.XMLReader.__init__(self)
- self._LibXml2Reader__ns = 0
- self._LibXml2Reader__nspfx = 0
- self._LibXml2Reader__validate = 0
- self._LibXml2Reader__extparams = 1
- self._LibXml2Reader__parsing = 0
- self._LibXml2Reader__lex_handler = None
- self._LibXml2Reader__decl_handler = None
- self._LibXml2Reader__errors = None
-
-
- def _errorHandler(self, arg, msg, severity, locator):
- if self._LibXml2Reader__errors is None:
- self._LibXml2Reader__errors = []
-
- self._LibXml2Reader__errors.append((severity, SAXParseException(msg, None, Locator(locator))))
-
-
- def _reportErrors(self, fatal):
- for severity, exception in self._LibXml2Reader__errors:
- if severity in (libxml2.PARSER_SEVERITY_VALIDITY_WARNING, libxml2.PARSER_SEVERITY_WARNING):
- self._err_handler.warning(exception)
- continue
- if fatal and exception is self._LibXml2Reader__errors[-1][1]:
- self._err_handler.fatalError(exception)
- continue
- self._err_handler.error(exception)
-
- self._LibXml2Reader__errors = None
-
-
- def parse(self, source):
- self._LibXml2Reader__parsing = 1
-
- try:
- if type(source) in StringTypes:
- reader = libxml2.newTextReaderFilename(source)
- else:
- source = saxutils.prepare_input_source(source)
- input = libxml2.inputBuffer(source.getByteStream())
- reader = input.newTextReader(source.getSystemId())
- reader.SetErrorHandler(self._errorHandler, None)
- if self._LibXml2Reader__extparams:
- reader.SetParserProp(libxml2.PARSER_LOADDTD, 1)
- reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS, 1)
- reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1)
- reader.SetParserProp(libxml2.PARSER_VALIDATE, self._LibXml2Reader__validate)
- else:
- reader.SetParserProp(libxml2.PARSER_LOADDTD, 0)
- if self._LibXml2Reader__ns:
- attributesNSImpl = xmlreader.AttributesNSImpl({ }, { })
- else:
- attributesImpl = xmlreader.AttributesImpl({ })
- prefixes = []
- self._cont_handler.startDocument()
- while None:
- r = reader.Read()
- if r == 1:
- if self._LibXml2Reader__errors is not None:
- self._reportErrors(0)
-
- elif r == 0:
- if self._LibXml2Reader__errors is not None:
- self._reportErrors(0)
-
- break
- elif self._LibXml2Reader__errors is not None:
- self._reportErrors(1)
- else:
- self._err_handler.fatalError(SAXException('Read failed (no details available)'))
- break
- nodeType = reader.NodeType()
- if nodeType == 1:
- pass
- if nodeType == 15:
- if self._LibXml2Reader__ns:
- self._cont_handler.endElementNS((_d(reader.NamespaceUri()), _d(reader.LocalName())), _d(reader.Name()))
- for prefix in prefixes.pop():
- self._cont_handler.endPrefixMapping(prefix)
-
- else:
- self._cont_handler.endElement(_d(reader.Name()))
- self._LibXml2Reader__ns
- if nodeType == 3:
- self._cont_handler.characters(_d(reader.Value()))
- continue
- if nodeType == 13:
- self._cont_handler.ignorableWhitespace(_d(reader.Value()))
- continue
- if nodeType == 14:
- self._cont_handler.characters(_d(reader.Value()))
- continue
- if nodeType == 4:
- if self._LibXml2Reader__lex_handler is not None:
- self._LibXml2Reader__lex_handler.startCDATA()
-
- self._cont_handler.characters(_d(reader.Value()))
- if self._LibXml2Reader__lex_handler is not None:
- self._LibXml2Reader__lex_handler.endCDATA()
-
- self._LibXml2Reader__lex_handler is not None
- if nodeType == 5:
- if self._LibXml2Reader__lex_handler is not None:
- self.startEntity(_d(reader.Name()))
-
- reader.ResolveEntity()
- continue
- if nodeType == 16:
- if self._LibXml2Reader__lex_handler is not None:
- self.endEntity(_d(reader.Name()))
-
- self._LibXml2Reader__lex_handler is not None
- if nodeType == 7:
- self._cont_handler.processingInstruction(_d(reader.Name()), _d(reader.Value()))
- continue
- if nodeType == 8:
- if self._LibXml2Reader__lex_handler is not None:
- self._LibXml2Reader__lex_handler.comment(_d(reader.Value()))
-
- self._LibXml2Reader__lex_handler is not None
- if nodeType == 10:
- continue
- if nodeType == 17:
- continue
- if nodeType == 6:
- continue
- if nodeType == 12:
- continue
- raise SAXException('Unexpected node type %d' % nodeType)
- continue
- if r == 0:
- self._cont_handler.endDocument()
-
- reader.Close()
- finally:
- self._LibXml2Reader__parsing = 0
-
-
-
- def setDTDHandler(self, handler):
- raise SAXNotSupportedException('DTDHandler not supported')
-
-
- def setEntityResolver(self, resolver):
- raise SAXNotSupportedException('EntityResolver not supported')
-
-
- def getFeature(self, name):
- if name == feature_namespaces:
- return self._LibXml2Reader__ns
- if name == feature_namespace_prefixes:
- return self._LibXml2Reader__nspfx
- if name == feature_validation:
- return self._LibXml2Reader__validate
- if name == feature_external_ges:
- return 1
- if name == feature_external_pes:
- return self._LibXml2Reader__extparams
- raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
-
-
- def setFeature(self, name, state):
- if self._LibXml2Reader__parsing:
- raise SAXNotSupportedException('Cannot set feature %s while parsing' % name)
- self._LibXml2Reader__parsing
- if name == feature_namespaces:
- self._LibXml2Reader__ns = state
- elif name == feature_namespace_prefixes:
- self._LibXml2Reader__nspfx = state
- elif name == feature_validation:
- self._LibXml2Reader__validate = state
- elif name == feature_external_ges:
- if state == 0:
- raise SAXNotSupportedException("Feature '%s' not supported" % name)
- state == 0
- elif name == feature_external_pes:
- self._LibXml2Reader__extparams = state
- else:
- raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
- return name == feature_namespaces
-
-
- def getProperty(self, name):
- if name == property_lexical_handler:
- return self._LibXml2Reader__lex_handler
- if name == property_declaration_handler:
- return self._LibXml2Reader__decl_handler
- raise SAXNotRecognizedException("Property '%s' not recognized" % name)
-
-
- def setProperty(self, name, value):
- if name == property_lexical_handler:
- self._LibXml2Reader__lex_handler = value
- elif name == property_declaration_handler:
- raise SAXNotSupportedException("Property '%s' not supported" % name)
- self._LibXml2Reader__decl_handler = value
- else:
- raise SAXNotRecognizedException("Property '%s' not recognized" % name)
- return name == property_lexical_handler
-
-
-
- def create_parser():
- return LibXml2Reader()
-
-